Load data

Duration ~ Region

Game duration is highly significantly longer in at region 9 and 20, highly significantly shorter at region 3, 5. Game duration is significantly longer at region 10, 38, significantly shorter at region 8, 18.

df$region <- factor(df$region)
model_region <- lm(duration ~ region, data = df)
summary(model_region)
## 
## Call:
## lm(formula = duration ~ region, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1795.7  -332.3   -76.3   245.7  4717.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2157.19      29.57  72.953  < 2e-16 ***
## region3      -252.86      30.25  -8.358  < 2e-16 ***
## region5      -137.49      35.25  -3.901 9.66e-05 ***
## region6        67.58      49.39   1.369  0.17119    
## region7       216.06     132.65   1.629  0.10339    
## region8       -77.00      32.38  -2.378  0.01745 *  
## region9       265.95      68.14   3.903 9.56e-05 ***
## region10      520.56     260.31   2.000  0.04555 *  
## region13       26.06      57.12   0.456  0.64829    
## region14      402.81     518.10   0.777  0.43690    
## region15      -59.34      47.18  -1.258  0.20849    
## region17      -64.23     109.65  -0.586  0.55800    
## region18     -208.42      87.95  -2.370  0.01781 *  
## region19      -62.35     105.66  -0.590  0.55517    
## region20     1227.56     260.31   4.716 2.44e-06 ***
## region25      -74.44     102.13  -0.729  0.46606    
## region38      127.11      46.42   2.739  0.00618 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 517.3 on 9983 degrees of freedom
## Multiple R-squared:  0.04567,    Adjusted R-squared:  0.04414 
## F-statistic: 29.86 on 16 and 9983 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$region, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Region",
     ylab = "Duration",
     main = "Duration ~ Region")

#abline(model_region, lwd = 2, col = "firebrick")

# residual plot
plot(model_region)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

Duration ~ First blood time

For each additional 1s delay in first blood, the game’s total duration is predicted to be 0.114 s shorter on average. Statistically, there’s a tiny negative slope, later first blood is associated with marginally shorter games. Practically, that relationship is negligible. First blood time accounts for virtually none of the variability in game duration.

model_first_blood <- lm(duration ~ first_blood_time, data = df)
summary(model_first_blood)
## 
## Call:
## lm(formula = duration ~ first_blood_time, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1121.0  -354.5   -96.4   256.2  4753.2 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1991.7149     8.5456 233.071   <2e-16 ***
## first_blood_time   -0.1144     0.0455  -2.515   0.0119 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared:  0.0006324,  Adjusted R-squared:  0.0005324 
## F-statistic: 6.327 on 1 and 9998 DF,  p-value: 0.01191
# scatterplot
plot(df$first_blood_time, df$duration,
     pch = 16, col = "steelblue",
     xlab = "First Blood Time",
     ylab = "Duration",
     main = "Duration ~ First Blood Time")
abline(model_first_blood, lwd = 2, col = "firebrick")

# residual plot
plot(model_first_blood)

Duration ~ Dire score

There is a positive slope. For each one unit increase in dire score, the game duration increases by 14.4s on average.

model_dire_score <- lm(duration ~ dire_score, data = df)
summary(model_dire_score)
## 
## Call:
## lm(formula = duration ~ dire_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1201.0  -326.5   -86.7   236.0  4428.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1558.9239    12.8889     121   <2e-16 ***
## dire_score    14.4128     0.4118      35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 499.4 on 9998 degrees of freedom
## Multiple R-squared:  0.1092, Adjusted R-squared:  0.1091 
## F-statistic:  1225 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$dire_score, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Dire Score",
     ylab = "Duration",
     main = "Duration ~ Dire Score")
abline(model_dire_score, lwd = 2, col = "firebrick")

# residual plot
plot(model_dire_score)

Duration ~ Radiant score

There is a positive slope. For each one unit increase in radiant score, the game duration increases by 12.6s on average.

model_radiant_score <- lm(duration ~ radiant_score, data = df)
summary(model_radiant_score)
## 
## Call:
## lm(formula = duration ~ radiant_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1261.9  -324.3   -97.1   236.1  4315.5 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1613.0045    13.4045  120.33   <2e-16 ***
## radiant_score   12.6324     0.4331   29.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 507.9 on 9998 degrees of freedom
## Multiple R-squared:  0.07842,    Adjusted R-squared:  0.07833 
## F-statistic: 850.8 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$radiant_score, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Radiant Score",
     ylab = "Duration",
     main = "Duration ~ Radiant Score")
abline(model_radiant_score, lwd = 2, col = "firebrick")

# residual plot
plot(model_radiant_score)

Duration ~ Experience gained at 15 mins

The scatterplot looks obviously polynomial/normal, simple linear regression is not a good way to model the data.

model_exp_15min <- lm(duration ~ exp_15min, data = df)
summary(model_exp_15min)
## 
## Call:
## lm(formula = duration ~ exp_15min, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1141.7  -355.4   -98.6   255.6  4747.0 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.975e+03  5.290e+00 373.371   <2e-16 ***
## exp_15min   -3.250e-03  1.277e-03  -2.545   0.0109 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared:  0.0006474,  Adjusted R-squared:  0.0005475 
## F-statistic: 6.477 on 1 and 9998 DF,  p-value: 0.01094
# scatterplot
plot(df$exp_15min, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Experience gained at 15 mins",
     ylab = "Duration",
     main = "Duration ~ Experience gained at 15 mins")
abline(model_exp_15min, lwd = 2, col = "firebrick")

# residual plot
plot(model_exp_15min)

Duration ~ Team fight duration

There is a positive correlation between team fight duration and total duration.

model_team_duration <- lm(duration ~ teamfight_duration, data = df)
summary(model_team_duration)
## 
## Call:
## lm(formula = duration ~ teamfight_duration, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1122.6  -332.2  -102.5   239.6  4463.9 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.629e+03  1.246e+01  130.73   <2e-16 ***
## teamfight_duration 1.038e+00  3.423e-02   30.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.3 on 9998 degrees of freedom
## Multiple R-squared:  0.08422,    Adjusted R-squared:  0.08413 
## F-statistic: 919.5 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_duration, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight duration",
     ylab = "Duration",
     main = "Duration ~ Team fight duration")
abline(model_team_duration, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_duration)

Duration ~ Team fight deaths

There is a positive slope. For one additional team fight death, the total duration increases by 11s.

model_team_death <- lm(duration ~ Tteamfight_deaths, data = df)
summary(model_team_death)
## 
## Call:
## lm(formula = duration ~ Tteamfight_deaths, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1073.0  -331.2  -105.7   239.6  4412.9 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1638.6520    12.1672  134.68   <2e-16 ***
## Tteamfight_deaths   11.2367     0.3698   30.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.2 on 9998 degrees of freedom
## Multiple R-squared:  0.08454,    Adjusted R-squared:  0.08444 
## F-statistic: 923.2 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$Tteamfight_deaths, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight deaths",
     ylab = "Duration",
     main = "Duration ~ Team fight deaths")
abline(model_team_death, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_death)

Duration ~ Team frequency

There is a positive slope. For one additional team fight, the total duration increases by 49.6s.

model_team_frequency <- lm(duration ~ teamfight_frequency, data = df)
summary(model_team_frequency)
## 
## Call:
## lm(formula = duration ~ teamfight_frequency, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1098.4  -333.6  -103.1   241.7  4373.0 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1620.296     12.811  126.48   <2e-16 ***
## teamfight_frequency   49.581      1.646   30.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.6 on 9998 degrees of freedom
## Multiple R-squared:  0.08324,    Adjusted R-squared:  0.08315 
## F-statistic: 907.8 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_frequency, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight frequency",
     ylab = "Duration",
     main = "Duration ~ Team fight frequency")
abline(model_team_frequency, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_frequency)

Duration ~ Number of Strength heroes picked by Radiant

model_strr <- lm(duration ~ Strength_picked_r, data = df)
summary(model_strr)
## 
## Call:
## lm(formula = duration ~ Strength_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1110.2  -355.6   -97.5   254.5  4757.8 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1989.473     10.273 193.669   <2e-16 ***
## Strength_picked_r  -10.290      6.189  -1.663   0.0964 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529 on 9998 degrees of freedom
## Multiple R-squared:  0.0002764,  Adjusted R-squared:  0.0001764 
## F-statistic: 2.764 on 1 and 9998 DF,  p-value: 0.09643
# scatterplot
plot(df$Strength_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Strength heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Number of Strength heroes picked by Radiant")
abline(model_strr, lwd = 2, col = "firebrick")

# residual plot
plot(model_strr)

Duration ~ Number of Strength heroes picked by Dire

model_strd <- lm(duration ~ Strength_picked_d, data = df)
summary(model_strd)
## 
## Call:
## lm(formula = duration ~ Strength_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1117.6  -355.8   -98.4   256.0  4745.4 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1991.579     10.295 193.442   <2e-16 ***
## Strength_picked_d  -11.758      6.201  -1.896    0.058 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529 on 9998 degrees of freedom
## Multiple R-squared:  0.0003594,  Adjusted R-squared:  0.0002594 
## F-statistic: 3.595 on 1 and 9998 DF,  p-value: 0.05799
# scatterplot
plot(df$Strength_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Strength heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Number of Strength heroes picked by Dire")
abline(model_strd, lwd = 2, col = "firebrick")

# residual plot
plot(model_strd)

Duration ~ Number of Intelligence heroes picked by Radiant

There is a negative slop. Every additional intelligence hero selected shortens the total game length by 34.5s on average.

model_intr <- lm(duration ~ Intelligence_picked_r, data = df)
summary(model_intr)
## 
## Call:
## lm(formula = duration ~ Intelligence_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1140.8  -353.0   -96.4   254.3  4718.2 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2018.839      9.388  215.04  < 2e-16 ***
## Intelligence_picked_r  -34.461      6.077   -5.67 1.46e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.2 on 9998 degrees of freedom
## Multiple R-squared:  0.003206,   Adjusted R-squared:  0.003106 
## F-statistic: 32.15 on 1 and 9998 DF,  p-value: 1.465e-08
# scatterplot
plot(df$Intelligence_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Intelligence heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Number of Intelligence heroes picked by Radiant")
abline(model_intr, lwd = 2, col = "firebrick")

# residual plot
plot(model_intr)

Duration ~ Number of Intelligence heroes picked by Dire

There is a negative slop. Every additional intelligence hero selected shortens the total game length by 35.9s on average.

model_intd <- lm(duration ~ Intelligence_picked_d, data = df)
summary(model_intd)
## 
## Call:
## lm(formula = duration ~ Intelligence_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1137.5  -353.6   -94.6   255.4  4751.4 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2021.470      9.623 210.067  < 2e-16 ***
## Intelligence_picked_d  -35.911      6.194  -5.798 6.92e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.2 on 9998 degrees of freedom
## Multiple R-squared:  0.003351,   Adjusted R-squared:  0.003251 
## F-statistic: 33.62 on 1 and 9998 DF,  p-value: 6.916e-09
# scatterplot
plot(df$Intelligence_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Intelligence heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Number of Intelligence heroes picked by Dire")
abline(model_intd, lwd = 2, col = "firebrick")

# residual plot
plot(model_intd)

# Duration ~ Number of Agility heroes picked by Radiant

model_agir <- lm(duration ~ Agility_picked_r, data = df)
summary(model_agir)
## 
## Call:
## lm(formula = duration ~ Agility_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1109.9  -357.2   -98.2   255.2  4762.8 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1969.431      9.765 201.674   <2e-16 ***
## Agility_picked_r    4.753      7.222   0.658     0.51    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared:  4.332e-05,  Adjusted R-squared:  -5.669e-05 
## F-statistic: 0.4331 on 1 and 9998 DF,  p-value: 0.5105
# scatterplot
plot(df$Agility_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Agility heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Number of Agility heroes picked by Radiant")
abline(model_agir, lwd = 2, col = "firebrick")

# residual plot
plot(model_agir)

Duration ~ Number of Agility heroes picked by Dire

model_agid <- lm(duration ~ Agility_picked_d, data = df)
summary(model_agid)
## 
## Call:
## lm(formula = duration ~ Agility_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1105.6  -356.6   -97.6   254.2  4762.4 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1972.963      9.786 201.607   <2e-16 ***
## Agility_picked_d    1.642      7.232   0.227     0.82    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared:  5.156e-06,  Adjusted R-squared:  -9.486e-05 
## F-statistic: 0.05155 on 1 and 9998 DF,  p-value: 0.8204
# scatterplot
plot(df$Agility_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Agility heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Number of Agility heroes picked by Dire")
abline(model_agid, lwd = 2, col = "firebrick")

# residual plot
plot(model_agid)

Duration ~ Number of Universal heroes picked by Radiant

There is a positive slop. Every additional intelligence hero selected increases the total game length by 35s on average.

model_unir <- lm(duration ~ Universal_picked_r, data = df)
summary(model_unir)
## 
## Call:
## lm(formula = duration ~ Universal_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1160.2  -350.0   -96.1   254.9  4697.8 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1934.036      8.402 230.198  < 2e-16 ***
## Universal_picked_r   35.058      5.615   6.243 4.46e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.1 on 9998 degrees of freedom
## Multiple R-squared:  0.003883,   Adjusted R-squared:  0.003784 
## F-statistic: 38.98 on 1 and 9998 DF,  p-value: 4.465e-10
# scatterplot
plot(df$Universal_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Universal heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Number of Universal heroes picked by Radiant")
abline(model_unir, lwd = 2, col = "firebrick")

# residual plot
plot(model_unir)

Duration ~ Number of Universal heroes picked by Dire

There is a positive slop. Every additional intelligence hero selected increases the total game length by 37.5s on average.

model_unid <- lm(duration ~ Universal_picked_d, data = df)
summary(model_unid)
## 
## Call:
## lm(formula = duration ~ Universal_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1141.6  -352.1   -95.1   256.4  4692.4 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1932.135      8.244 234.377  < 2e-16 ***
## Universal_picked_d   37.496      5.561   6.743 1.63e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 527.9 on 9998 degrees of freedom
## Multiple R-squared:  0.004527,   Adjusted R-squared:  0.004428 
## F-statistic: 45.47 on 1 and 9998 DF,  p-value: 1.634e-11
# scatterplot
plot(df$Universal_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Universal heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Number of Universal heroes picked by Dire")
abline(model_unid, lwd = 2, col = "firebrick")

# residual plot
plot(model_unid)